Setup the Seurat Object

library(Seurat)
## Loading required package: ggplot2
## Loading required package: cowplot
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
## 
##     ggsave
## Loading required package: Matrix
# Load all 4 datasets
cel.data <- read.table("/Volumes/Passport/scRNAseq/IntegrationExample/Example2_4sets/Data/pancreas_multi_celseq_expression_matrix.txt.gz", 
    sep = "\t")
cel2.data <- read.table("/Volumes/Passport/scRNAseq/IntegrationExample/Example2_4sets/Data/pancreas_multi_celseq2_expression_matrix.txt.gz", 
    sep = "\t")
fluid.data <- read.table("/Volumes/Passport/scRNAseq/IntegrationExample/Example2_4sets/Data/pancreas_multi_fluidigmc1_expression_matrix.txt.gz", 
    sep = "\t")
smart.data <- read.table("/Volumes/Passport/scRNAseq/IntegrationExample/Example2_4sets/Data/pancreas_multi_smartseq2_expression_matrix.txt.gz", 
    sep = "\t")
# Initialize the Seurat object with the raw (non-normalized data).  Set up
# cel object
cel <- CreateSeuratObject(raw.data = cel.data, project = "cel", min.cells = 5)
cel@meta.data$stim <- "cel"
cel <- FilterCells(cel, subset.names = "nGene", low.thresholds = 500, high.thresholds = Inf)
cel <- NormalizeData(cel)
cel <- ScaleData(cel, display.progress = F)
# Set up cel1 object
cel2 <- CreateSeuratObject(raw.data = cel2.data, project = "cel2", min.cells = 5)
cel2@meta.data$stim <- "cel2"
cel2 <- FilterCells(cel2, subset.names = "nGene", low.thresholds = 500, high.thresholds = Inf)
cel2 <- NormalizeData(cel2)
cel2 <- ScaleData(cel2, display.progress = F)
# Set up fluid object
fluid <- CreateSeuratObject(raw.data = fluid.data, project = "IMMUNE_CTRL", 
    min.cells = 5)
fluid@meta.data$stim <- "fluid"
fluid <- FilterCells(fluid, subset.names = "nGene", low.thresholds = 500, high.thresholds = Inf)
fluid <- NormalizeData(fluid)
fluid <- ScaleData(fluid, display.progress = F)
# Set up smart object
smart <- CreateSeuratObject(raw.data = smart.data, project = "IMMUNE_STIM", 
    min.cells = 5)
smart@meta.data$stim <- "smart"
smart <- FilterCells(smart, subset.names = "nGene", low.thresholds = 500, high.thresholds = Inf)
smart <- NormalizeData(smart)
smart <- ScaleData(smart, display.progress = F)
# Gene selection for input to CCA
cel <- FindVariableGenes(cel, do.plot = F)
cel2 <- FindVariableGenes(cel2, do.plot = F)
fluid <- FindVariableGenes(fluid, do.plot = F)
smart <- FindVariableGenes(smart, do.plot = F)
g.1 <- head(rownames(cel@hvg.info), 1000)
g.2 <- head(rownames(cel2@hvg.info), 1000)
g.3 <- head(rownames(fluid@hvg.info), 1000)
g.4 <- head(rownames(smart@hvg.info), 1000)
genes.use <- unique(c(g.1, g.2, g.3, g.4))
genes.use <- intersect(genes.use, rownames(cel@scale.data))
genes.use <- intersect(genes.use, rownames(cel2@scale.data))
genes.use <- intersect(genes.use, rownames(fluid@scale.data))
genes.use <- intersect(genes.use, rownames(smart@scale.data))

Perform a canonical correlation analysis (CCA)

sample.list <- list(cel, cel2, fluid, smart)
immune.combined <- RunMultiCCA(object.list = sample.list, genes.use = genes.use, 
    num.ccs = 30)
# explore the datasets
p1 <- DimPlot(object = immune.combined, reduction.use = "cca", group.by = "stim", 
    pt.size = 0.5, do.return = TRUE)
p2 <- VlnPlot(object = immune.combined, features.plot = "CC1", group.by = "stim", 
    do.return = TRUE)
plot_grid(p1, p2)

PrintDim(object = immune.combined, reduction.type = "cca", dims.print = 1:2, 
    genes.print = 10)
## [1] "CC1"
##  [1] "SCG5"    "CPE"     "CHGB"    "SCGN"    "TTR"     "SCG3"    "SLC30A8"
##  [8] "PCSK2"   "SCG2"    "PTPRN"  
## [1] ""
##  [1] "IFITM3"   "SERPINA3" "ZFP36L1"  "TACSTD2"  "TM4SF1"   "CD44"    
##  [7] "SAT1"     "CFB"      "IL32"     "ANXA4"   
## [1] ""
## [1] ""
## [1] "CC2"
##  [1] "HADH"    "INS"     "SCD5"    "NPTX2"   "ADCYAP1" "SORL1"   "PCSK1"  
##  [8] "SYT13"   "IAPP"    "PDX1"   
## [1] ""
##  [1] "GCG"      "TM4SF4"   "GC"       "FAP"      "LOXL4"    "IRX2"    
##  [7] "GLS"      "TMEM176B" "PLCE1"    "CRYBA2"  
## [1] ""
## [1] ""
p3 <- MetageneBicorPlot(immune.combined, grouping.var = "stim", dims.eval = 1:30, 
    display.progress = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

DimHeatmap(object = immune.combined, reduction.type = "cca", cells.use = 500, 
    dim.use = 1:9, do.balanced = TRUE)

# Align the CCA subspaces

immune.combined <- AlignSubspace(immune.combined, reduction.type = "cca", grouping.var = "stim", 
    dims.align = 1:20)
## Scaling data matrix
## Scaling data matrix
## Scaling data matrix
## Scaling data matrix
p1 <- VlnPlot(object = immune.combined, features.plot = "ACC1", group.by = "stim", 
    do.return = TRUE)
p2 <- VlnPlot(object = immune.combined, features.plot = "ACC2", group.by = "stim", 
    do.return = TRUE)
plot_grid(p1, p2)

# Perform an integrated analysis

# run t-SNE and find clusters
immune.combined <- RunTSNE(immune.combined, reduction.use = "cca.aligned", dims.use = 1:20, 
    do.fast = T)
immune.combined <- FindClusters(immune.combined, reduction.type = "cca.aligned", 
    resolution = 0.6, dims.use = 1:20)
p1 <- TSNEPlot(immune.combined, do.return = T, pt.size = 0.5, group.by = "stim")
p2 <- TSNEPlot(immune.combined, do.label = T, do.return = T, pt.size = 0.5)
plot_grid(p1, p2)

# Identify conserved cell type markers
nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 7, grouping.var = "stim", 
    print.bar = FALSE)
## Warning in if (!ident.use.2 %in% object@ident) {: the condition has length
## > 1 and only the first element will be used

## Warning in if (!ident.use.2 %in% object@ident) {: the condition has length
## > 1 and only the first element will be used

## Warning in if (!ident.use.2 %in% object@ident) {: the condition has length
## > 1 and only the first element will be used

## Warning in if (!ident.use.2 %in% object@ident) {: the condition has length
## > 1 and only the first element will be used
head(nk.markers)
FeaturePlot(object = immune.combined, features.plot = rownames(nk.markers)[1:6], 
    min.cutoff = "q9", cols.use = c("lightgrey", "blue"), pt.size = 0.5)

Determine cell types of those clusters

# determine the cell type name
new.ident <- c("Cell type 1", "Cell type 2", "Cell type 3", "Cell type 4", "Cell type 5", 
    "Cell type 6", "Cell type 7", "Cell type 8", "Cell type 9", "Cell type 10", 
    "Cell type 11", "Cell type 12")
# rename
for (i in 0:11) {
    immune.combined <- RenameIdent(object = immune.combined, old.ident.name = i, 
        new.ident.name = new.ident[i + 1])
}
# plot with new name label
TSNEPlot(immune.combined, do.label = T, pt.size = 0.5)

immune.combined@ident <- factor(immune.combined@ident, levels = new.ident)
markers.to.plot <- rownames(nk.markers)[1:20]
sdp <- SplitDotPlotGG(immune.combined, genes.plot = rev(markers.to.plot), cols.use = c("blue", 
    "red", "green", "yellow"), x.lab.rot = T, plot.legend = T, dot.scale = 8, 
    do.return = T, grouping.var = "stim")

Identify differential expressed genes across conditions

# LabelPoint <- function(plot, genes, exp.mat, adj.x.t = 0, adj.y.t = 0,
# adj.x.s = 0, adj.y.s = 0, text.size = 2.5, segment.size = 0.1) { for (i in
# genes) { x1 <- exp.mat[i, 1] y1 <- exp.mat[i, 2] plot <- plot +
# annotate('text', x = x1 + adj.x.t, y = y1 + adj.y.t, label = i, size =
# text.size) plot <- plot + annotate('segment', x = x1 + adj.x.s, xend = x1,
# y = y1 + adj.y.s, yend = y1, size = segment.size) } return(plot) }

# LabelUR <- function(plot, genes, exp.mat, adj.u.t = 0.1, adj.r.t = 0.15,
# adj.u.s = 0.05, adj.r.s = 0.05, ...) { return(LabelPoint(plot, genes,
# exp.mat, adj.y.t = adj.u.t, adj.x.t = adj.r.t, adj.y.s = adj.u.s, adj.x.s
# = adj.r.s, ...)) }

# LabelUL <- function(plot, genes, exp.mat, adj.u.t = 0.1, adj.l.t = 0.15,
# adj.u.s = 0.05, adj.l.s = 0.05, ...) { return(LabelPoint(plot, genes,
# exp.mat, adj.y.t = adj.u.t, adj.x.t = -adj.l.t, adj.y.s = adj.u.s, adj.x.s
# = -adj.l.s, ...)) }


# t.cells <- SubsetData(immune.combined, ident.use = 'CD4 Naive T',
# subset.raw = T) t.cells <- SetAllIdent(t.cells, id = 'stim') avg.t.cells
# <- log1p(AverageExpression(t.cells, show.progress = FALSE))
# avg.t.cells$gene <- rownames(avg.t.cells)

# cd14.mono <- SubsetData(immune.combined, ident.use = 'CD14 Mono',
# subset.raw = T) cd14.mono <- SetAllIdent(cd14.mono, id = 'stim')
# avg.cd14.mono <- log1p(AverageExpression(cd14.mono, show.progress =
# FALSE)) avg.cd14.mono$gene <- rownames(avg.cd14.mono)

# genes.to.label1 = c('ISG15', 'LY6E', 'IFI6', 'ISG20', 'MX1')
# genes.to.label2 = c('IFIT2', 'IFIT1') genes.to.label3 = c('CXCL10',
# 'CCL8') p1 <- ggplot(avg.t.cells, aes(CTRL, STIM)) + geom_point() +
# ggtitle('CD4 Naive T Cells') p1 <- LabelUR(p1, genes = c(genes.to.label1,
# genes.to.label2), avg.t.cells, adj.u.t = 0.3, adj.u.s = 0.23) p1 <-
# LabelUL(p1, genes = genes.to.label3, avg.t.cells, adj.u.t = 0.5, adj.u.s =
# 0.4, adj.l.t = 0.25, adj.l.s = 0.25) p2 <- ggplot(avg.cd14.mono, aes(CTRL,
# STIM)) + geom_point() + ggtitle('CD14 Monocytes') p2 <- LabelUR(p2, genes
# = c(genes.to.label1, genes.to.label3), avg.cd14.mono, adj.u.t = 0.3,
# adj.u.s = 0.23) p2 <- LabelUL(p2, genes = genes.to.label2, avg.cd14.mono,
# adj.u.t = 0.5, adj.u.s = 0.4, adj.l.t = 0.25, adj.l.s = 0.25)
# plot_grid(p1, p2)


# immune.combined@meta.data$celltype.stim <- paste0(immune.combined@ident,
# '_', immune.combined@meta.data$stim) immune.combined <-
# StashIdent(immune.combined, save.name = 'celltype') immune.combined <-
# SetAllIdent(immune.combined, id = 'celltype.stim') b.interferon.response
# <- FindMarkers(immune.combined, ident.1 = 'B_STIM', ident.2 = 'B_CTRL',
# print.bar = FALSE) head(b.interferon.response, 15)

# FeatureHeatmap(immune.combined, features.plot = c('CD3D', 'GNLY', 'IFI6',
# 'ISG15', 'CD14', 'CXCL10'), group.by = 'stim', pt.size = 0.25,
# key.position = 'top', max.exp = 3)
# saveRDS(immune.combined, file = 'immune_combined.rds')